The aim of this task is data collection as my friends and I were considering to launch a pet mobile application/startup serving pets in Egypt.
The goal is to list the pet shops along with their details [address, phone numbers, email, ratings, etc.].
This is done using google maps apis.
import googlemaps
import pprint
import time
import json
import pandas as pd
import plotly.express as px
import plotly.offline as pyo
# Set notebook mode to work in offline
pyo.init_notebook_mode()
# Adding your token
gmaps = googlemaps.Client(key = '<token>')
places_result = gmaps.places(query = 'pet store',location='30.033333,31.233334', radius=50000, open_now=False)
This is how the data looks raw in json format
pprint.pprint(places_result['results'][1])
{'business_status': 'OPERATIONAL',
'formatted_address': 'Inside Watanya fuel station, Omar Ibn El-Khattab, '
'Almazah, Heliopolis, Cairo Governorate, Egypt',
'geometry': {'location': {'lat': 30.07568929999999, 'lng': 31.3477586},
'viewport': {'northeast': {'lat': 30.07706057989272,
'lng': 31.34910717989272},
'southwest': {'lat': 30.07436092010728,
'lng': 31.34640752010728}}},
'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/shopping-71.png',
'icon_background_color': '#4B96F3',
'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/shopping_pinlet',
'name': '7Pets',
'opening_hours': {'open_now': True},
'photos': [{'height': 3456,
'html_attributions': ['<a '
'href="https://maps.google.com/maps/contrib/117516873709914055487">mahdy '
'mohammed</a>'],
'photo_reference': 'Aap_uEB635vKWTb7pjpdL4GSYSq1xqSNFFUg3LJBW8sDUZJoYVgfACwvrEbxbPkp1zch-8yEOHwZTK0e7YBMmqsw8WUuEJzrKgs22DelBbwIOfr8gNUWATjGd_u3G2sv0kFNP6RFWMN6Wzm0VXvjZjJSa3o0PES4Sh8NVNY89C6DPddn7C7A',
'width': 4608}],
'place_id': 'ChIJ77ze55g_WBQRKqlAqbV-B1c',
'plus_code': {'compound_code': '38GX+74 Heliopolis',
'global_code': '8G2H38GX+74'},
'rating': 4.7,
'reference': 'ChIJ77ze55g_WBQRKqlAqbV-B1c',
'types': ['pet_store', 'point_of_interest', 'store', 'establishment'],
'user_ratings_total': 6}
The below code has 3 tags/clusters in dictionary of k,v format.
Each tag has search terms, i.e. in the pet shops tag we have the below search terms:
For each tag I'll try and type all possible search terms I can think of, then I'll enumerate for each search term inside each search tag.
The below code will also structure the json data in the following columns:
places_list = []
places_df = pd.DataFrame()
#search_terms = ['pet store','pet shop', 'Pet Food', 'Pet Supplies', 'Pet Accesories']
search_terms = {'pet shops':['pet store','pet shop', 'Pet Food', 'Pet Supplies', 'Pet Accesories'],
'pet shelters':['pet shelter'],
'vet':['vet','veterinarians', 'pet clinic', 'pet health center']
}
batch = 0
counter = 0
for srchtags, srchtrms in search_terms.items():
term = 1
for srchtrm in srchtrms:
#print(srchtrm)
places_result = gmaps.places(query = srchtrm, location='30.033333,31.233334', open_now=False, radius = 40000 )
while 'next_page_token' in places_result and term <= len(srchtrms):
for ele in places_result['results']:
try:
places_list.append( {'id': str(ele['place_id']),
'search_tag': str(srchtags),
'search_term': str(srchtrm),
'name': str(ele['name']),
'address': str(ele['formatted_address']),
'location': str(ele['geometry']['location']),
#'district': str(" ".join(ele['plus_code']['compound_code'].split()[1:])),
'district': str(ele['plus_code']['compound_code'].split(None, 1)[1]),
'rating': float(ele['rating']), 'total_rating': str(ele['user_ratings_total']),
'status': str(ele['business_status'])} )
batch = batch + 1
except (TypeError,KeyError):
pass
places_df = places_df.append(places_list,ignore_index=True)
#places_df.to_csv('pet_shops_raw.csv', mode='w', encoding='utf-8-sig')
places_list.clear()
counter = counter + 1
time.sleep(2)
places_result = gmaps.places(query = srchtrm, page_token = places_result['next_page_token'])
print(srchtags ,"|", "Term: ",term," - ",srchtrm," - ","counter: ",counter, " - ","Batch: ",batch)
term = term + 1
pet shops | Term: 1 - pet store - counter: 1 - Batch: 19 pet shops | Term: 1 - pet store - counter: 2 - Batch: 37 pet shops | Term: 2 - pet shop - counter: 3 - Batch: 55 pet shops | Term: 2 - pet shop - counter: 4 - Batch: 73 pet shops | Term: 3 - Pet Food - counter: 5 - Batch: 91 pet shops | Term: 3 - Pet Food - counter: 6 - Batch: 111 pet shops | Term: 4 - Pet Supplies - counter: 7 - Batch: 129 pet shops | Term: 4 - Pet Supplies - counter: 8 - Batch: 148 pet shops | Term: 5 - Pet Accesories - counter: 9 - Batch: 166 pet shops | Term: 5 - Pet Accesories - counter: 10 - Batch: 184 pet shelters | Term: 1 - pet shelter - counter: 11 - Batch: 197 pet shelters | Term: 1 - pet shelter - counter: 12 - Batch: 215 vet | Term: 1 - vet - counter: 13 - Batch: 235 vet | Term: 1 - vet - counter: 14 - Batch: 255 vet | Term: 2 - veterinarians - counter: 15 - Batch: 274 vet | Term: 2 - veterinarians - counter: 16 - Batch: 294 vet | Term: 3 - pet clinic - counter: 17 - Batch: 313 vet | Term: 3 - pet clinic - counter: 18 - Batch: 333 vet | Term: 4 - pet health center - counter: 19 - Batch: 352 vet | Term: 4 - pet health center - counter: 20 - Batch: 371
places_df.head(5)
| id | search_tag | search_term | name | address | location | district | rating | total_rating | status | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ChIJ99GuTmY9WBQRfAYaCLGqMtE | pet shops | pet store | My Pet's Shop (متجر حيوانات أليفة) | 6 Hassan Ma'moon, Al Manteqah as Sadesah, Nasr... | {'lat': 30.0605233, 'lng': 31.3581079} | Nasr City | 4.9 | 11 | OPERATIONAL |
| 1 | ChIJoagxIXQ-WBQRd6mD72a3kTc | pet shops | pet store | Gnyora | شارع Moez Al Dawla, قسم اول، Nasr City, Cairo ... | {'lat': 30.060309, 'lng': 31.3500485} | Nasr City | 4.7 | 7 | OPERATIONAL |
| 2 | ChIJyVZHPnc9WBQRD5MFW8nH6rI | pet shops | pet store | Snopy Pet Store | 70 Hesham Labib, Al Manteqah Ath Thamenah, Nas... | {'lat': 30.05487209999999, 'lng': 31.3548689} | Nasr City | 4.9 | 17 | OPERATIONAL |
| 3 | ChIJR14zt7w_WBQRrG1eJGSvZK8 | pet shops | pet store | rus’Puppy | 15 St Ahmed Fakhry, Al Manteqah as Sadesah, Na... | {'lat': 30.0673209, 'lng': 31.3489937} | Nasr City | 4.3 | 24 | OPERATIONAL |
| 4 | ChIJnQTqROE9WBQR3gn2xiKbRbc | pet shops | pet store | Amazon Pet store | Al Madrasa Al Fanyah, An Nadi Al Ahli, Nasr Ci... | {'lat': 30.0677009, 'lng': 31.3589193} | Nasr City | 3.7 | 3 | OPERATIONAL |
The below is the ammount of rows inside each search term
places_df.value_counts('search_term')
search_term vet 40 pet clinic 39 veterinarians 39 Pet Food 38 pet health center 38 Pet Supplies 37 pet store 37 Pet Accesories 36 pet shop 36 pet shelter 31 dtype: int64
I'll remove duplicate rows, since different search terms can return the same pet shop.
orig_size = places_df.shape[0]
uniq_size = places_df['id'].duplicated().sum()
print('Original row count:', orig_size)
print('Number of duplicates: ', uniq_size)
print('Count after removal: ', orig_size - uniq_size)
Original row coun: 371 Number of duplicates: 175 Count after removal: 196
id duplicate stores¶places_df[places_df.id.duplicated()].sort_values('id').head(5)
| id | search_tag | search_term | name | address | location | district | rating | total_rating | status | |
|---|---|---|---|---|---|---|---|---|---|---|
| 61 | ChIJ-8QL2ms9WBQR9WQ85TQbGK4 | pet shops | pet shop | Amazon pets shop | محور مهدي عرفة،, Al Hay Al Asher, Nasr City, C... | {'lat': 30.0391933, 'lng': 31.37769029999999} | Nasr City | 4.3 | 3 | OPERATIONAL |
| 169 | ChIJ-9yq77s9WBQRxIoo4sauwg0 | pet shops | Pet Accesories | Amazon Pets Shop | Zahraa Nasr city - Masakin Al dobbaat, Al Hay ... | {'lat': 30.05003779999999, 'lng': 31.3841974} | Nasr City | 3.9 | 130 | OPERATIONAL |
| 142 | ChIJ-9yq77s9WBQRxIoo4sauwg0 | pet shops | Pet Supplies | Amazon Pets Shop | Zahraa Nasr city - Masakin Al dobbaat, Al Hay ... | {'lat': 30.05003779999999, 'lng': 31.3841974} | Nasr City | 3.9 | 130 | OPERATIONAL |
| 60 | ChIJ-9yq77s9WBQRxIoo4sauwg0 | pet shops | pet shop | Amazon Pets Shop | Zahraa Nasr city - Masakin Al dobbaat, Al Hay ... | {'lat': 30.05003779999999, 'lng': 31.3841974} | Nasr City | 3.9 | 130 | OPERATIONAL |
| 363 | ChIJ-cPOQShHWBQRtskLcbbibTo | vet | pet health center | Orange vet center عيادة بيطرية - طوارئ ٢٤ ساعة | 11 Gamal Salem, Mossadak, Dokki, Giza Governor... | {'lat': 30.038355, 'lng': 31.2022205} | Dokki | 4.6 | 26 | OPERATIONAL |
id¶places_df.drop_duplicates(subset=['id'],inplace=True)
places_df.shape
(196, 10)
Below, I will be adding the phone number (either mobile or landline) and opearting hours for each day of the week.
Note: That this data requires fees to be paid, so I'd be careful here.
places_details_list = []
places_details_df = pd.DataFrame()
n = 0
for place in places_df['id']:
# make a resut by details
place_details = gmaps.place(place_id = place, fields = ['formatted_phone_number', 'opening_hours'])
try: # Exception when shops don't have phone number, working or other details to pass it
places_details_list.append(
{'id': place,
'phone_number':str(place_details['result'].get('formatted_phone_number')),
'opening_hours':list(place_details['result'].get('opening_hours')['weekday_text'])
}
)
except (TypeError,KeyError):
pass
places_details_df = places_details_df.append(places_details_list,ignore_index=True)
places_details_list.clear()
print('Original data:', len(places_df))
print('Detailed data:', len(places_details_df))
print('Difference:', len(places_df)- len(places_details_df), '- Empty without details')
Original data: 220 Detailed data: 171 Difference: 49 - Empty without details
places_details_df.head(10)
| id | phone_number | opening_hours | |
|---|---|---|---|
| 0 | ChIJ99GuTmY9WBQRfAYaCLGqMtE | 0120 102 2233 | [Monday: 2:00 PM – 12:00 AM, Tuesday: 2:00 PM ... |
| 1 | ChIJoagxIXQ-WBQRd6mD72a3kTc | 0106 764 4558 | [Monday: 11:00 AM – 12:00 AM, Tuesday: 11:00 A... |
| 2 | ChIJyVZHPnc9WBQRD5MFW8nH6rI | 0106 174 4427 | [Monday: 12:00 – 11:00 PM, Tuesday: 12:00 – 11... |
| 3 | ChIJR14zt7w_WBQRrG1eJGSvZK8 | 0102 200 9909 | [Monday: 11:00 AM – 10:00 PM, Tuesday: 1:00 – ... |
| 4 | ChIJnQTqROE9WBQR3gn2xiKbRbc | 0106 488 1118 | [Monday: 12:00 – 11:00 PM, Tuesday: 12:00 – 11... |
| 5 | ChIJ2Yw3T949WBQRuEpiH-fDQiA | 0102 430 0700 | [Monday: 12:00 PM – 12:00 AM, Tuesday: 12:00 P... |
| 6 | ChIJFxzcthA-WBQRlDpqHoZdZbg | 02 24159698 | [Monday: 10:00 AM – 11:00 PM, Tuesday: 10:00 A... |
| 7 | ChIJmUEW1lIUWBQRShFONmy3150 | 02 25992799 | [Monday: 9:00 AM – 11:00 PM, Tuesday: 9:00 AM ... |
| 8 | ChIJFS7MT_U-WBQRqobiIYYTGiQ | 0100 355 9543 | [Monday: 12:00 – 11:00 PM, Tuesday: 12:00 – 11... |
| 9 | ChIJe3QaHmY9WBQRPa_a6W_pEhI | 0100 049 9442 | [Monday: 11:00 AM – 11:00 PM, Tuesday: 11:00 A... |
places_df_final = pd.merge(places_df, places_details_df, on="id", how = 'left')
places_df_final.shape
(196, 12)
places_df_final.index = range(1,len(places_df_final)+1)
places_df_final.head(5)
| id | search_tag | search_term | name | address | location | district | rating | total_rating | status | phone_number | opening_hours | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | ChIJ99GuTmY9WBQRfAYaCLGqMtE | pet shops | pet store | My Pet's Shop (متجر حيوانات أليفة) | 6 Hassan Ma'moon, Al Manteqah as Sadesah, Nasr... | {'lat': 30.0605233, 'lng': 31.3581079} | Nasr City | 4.9 | 11 | OPERATIONAL | 0120 102 2233 | [Monday: 2:00 PM – 12:00 AM, Tuesday: 2:00 PM ... |
| 2 | ChIJoagxIXQ-WBQRd6mD72a3kTc | pet shops | pet store | Gnyora | شارع Moez Al Dawla, قسم اول، Nasr City, Cairo ... | {'lat': 30.060309, 'lng': 31.3500485} | Nasr City | 4.7 | 7 | OPERATIONAL | 0106 764 4558 | [Monday: 11:00 AM – 12:00 AM, Tuesday: 11:00 A... |
| 3 | ChIJyVZHPnc9WBQRD5MFW8nH6rI | pet shops | pet store | Snopy Pet Store | 70 Hesham Labib, Al Manteqah Ath Thamenah, Nas... | {'lat': 30.05487209999999, 'lng': 31.3548689} | Nasr City | 4.9 | 17 | OPERATIONAL | 0106 174 4427 | [Monday: 12:00 – 11:00 PM, Tuesday: 12:00 – 11... |
| 4 | ChIJR14zt7w_WBQRrG1eJGSvZK8 | pet shops | pet store | rus’Puppy | 15 St Ahmed Fakhry, Al Manteqah as Sadesah, Na... | {'lat': 30.0673209, 'lng': 31.3489937} | Nasr City | 4.3 | 24 | OPERATIONAL | 0102 200 9909 | [Monday: 11:00 AM – 10:00 PM, Tuesday: 1:00 – ... |
| 5 | ChIJnQTqROE9WBQR3gn2xiKbRbc | pet shops | pet store | Amazon Pet store | Al Madrasa Al Fanyah, An Nadi Al Ahli, Nasr Ci... | {'lat': 30.0677009, 'lng': 31.3589193} | Nasr City | 3.7 | 3 | OPERATIONAL | 0106 488 1118 | [Monday: 12:00 – 11:00 PM, Tuesday: 12:00 – 11... |
places_df_final['total_rating'] = places_df_final['total_rating'].astype(int)
places_df_final.sort_values(by = ['total_rating'],ascending=False).head(10)
| id | search_tag | search_term | name | address | location | district | rating | total_rating | status | phone_number | opening_hours | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 132 | ChIJy5F0FXI-WBQRLUiAS4Ww0oY | vet | vet | Happy Pets - Nasr City Branch | Hasan Ibrahim ,behind miffico helwan, 36 Makra... | {'lat': 30.066485, 'lng': 31.34378} | Nasr City | 4.1 | 647 | OPERATIONAL | 0120 120 4050 | [Monday: 10:00 AM – 12:00 AM, Tuesday: 10:00 A... |
| 58 | ChIJOVErAOA7WBQR3mOI3gSM08w | pet shops | Pet Supplies | 7 Pets | Al Nakhil, Al Hay Al Asher, Nasr City, Cairo G... | {'lat': 29.9815143, 'lng': 31.3541839} | Nasr City | 4.4 | 381 | OPERATIONAL | 0106 664 4044 | [Monday: 10:00 AM – 12:00 AM, Tuesday: 10:00 A... |
| 133 | ChIJT43rU30YWBQRs_8Ebnh698c | vet | vet | Alphavet Vet clinic | Second New Cairo, Cairo Governorate, Egypt | {'lat': 30.05976849999999, 'lng': 31.4910714} | Second New Cairo | 4.2 | 293 | OPERATIONAL | 02 26072594 | [Monday: 10:00 AM – 10:00 PM, Tuesday: 10:00 A... |
| 100 | ChIJT4YEeXNAWBQRmwgjR7IbJd0 | pet shelters | pet shelter | SPCA - Cairo | Abou Wafia, El-Zawya El-Hamraa, El Sharabiya, ... | {'lat': 30.097489, 'lng': 31.258012} | El Sharabiya | 3.7 | 289 | OPERATIONAL | 0111 552 2222 | [Monday: 9:30 AM – 2:00 PM, Tuesday: 9:30 AM –... |
| 152 | ChIJK6d9T-E9WBQRpEXpAhvQf8U | vet | veterinarians | Animals | An Nadi Al Ahli, Nasr City, Cairo Governorate,... | {'lat': 30.0675403, 'lng': 31.3591826} | Nasr City | 4.4 | 275 | OPERATIONAL | 02 24712324 | [Monday: 10:00 AM – 10:00 PM, Tuesday: 10:00 A... |
| 177 | ChIJPwYm9SdaWBQRU-FPa20W4kw | vet | pet health center | Pet Care Hospital - Dr Ahmed Ramadan | مول المعز- خلف الجامعه الكنديه CIC، Sheikh Zay... | {'lat': 30.0443241, 'lng': 30.9890684} | Sheikh Zayed City | 4.6 | 264 | OPERATIONAL | 0100 474 0482 | [Monday: Open 24 hours, Tuesday: Open 24 hours... |
| 148 | ChIJAWPGlF0-WBQRJtFyjxAdiSU | vet | vet | Pet Welfare Center | Al Manteqah Al Oula, Nasr City, Cairo Governor... | {'lat': 30.0352311, 'lng': 31.3435415} | Nasr City | 4.0 | 216 | OPERATIONAL | 0100 014 1455 | [Monday: Open 24 hours, Tuesday: Open 24 hours... |
| 79 | ChIJV_Rj2bhIWBQRp4gOY5FBAfA | pet shelters | pet shelter | ESMA Cats Shelter | Al Mansoureya Rd, Nazlet Al Ashtar, Giza Distr... | {'lat': 29.9439755, 'lng': 31.1871674} | Giza District | 4.0 | 203 | OPERATIONAL | NaN | NaN |
| 115 | ChIJCQAwvhE5WBQRyVX1opYXOqU | vet | vet | عيادة دكتور رامي عيسى البيطرية. Dr. Ramy Eissa... | عمارة ٢، شارع السعادة ،، El-Basatin Sharkeya, ... | {'lat': 29.9805189, 'lng': 31.3237094} | El Basatin | 4.5 | 199 | OPERATIONAL | 0106 009 9011 | [Monday: 9:00 AM – 11:00 PM, Tuesday: 9:00 AM ... |
| 10 | ChIJmUEW1lIUWBQRShFONmy3150 | pet shops | pet store | PetsEgypt Sheraton Heliopolis | قسم النزهة،, ٢ Khaled Ibn Al Walid, Sheraton A... | {'lat': 30.0998187, 'lng': 31.3729071} | El Nozha | 4.2 | 194 | OPERATIONAL | 02 25992799 | [Monday: 9:00 AM – 11:00 PM, Tuesday: 9:00 AM ... |
places_df_final.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 196 entries, 1 to 196 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 196 non-null object 1 search_tag 196 non-null object 2 search_term 196 non-null object 3 name 196 non-null object 4 address 196 non-null object 5 location 196 non-null object 6 district 196 non-null object 7 rating 196 non-null float64 8 total_rating 196 non-null int32 9 status 196 non-null object 10 phone_number 153 non-null object 11 opening_hours 153 non-null object dtypes: float64(1), int32(1), object(10) memory usage: 17.7+ KB
display(places_df_final.describe())
print(places_df_final.shape)
| rating | total_rating | |
|---|---|---|
| count | 196.000000 | 196.000000 |
| mean | 3.888776 | 49.000000 |
| std | 1.579346 | 78.191678 |
| min | 0.000000 | 0.000000 |
| 25% | 4.000000 | 2.000000 |
| 50% | 4.500000 | 16.500000 |
| 75% | 4.800000 | 63.000000 |
| max | 5.000000 | 647.000000 |
(196, 12)
The below is the cateogry distribution for each search tag. We have mostly vets, then pet shops more than shelters which makes sense.
ctgry_cnt = places_df_final.search_tag.value_counts()
ctgry_cnt = pd.DataFrame(ctgry_cnt).reset_index(level=0).rename(columns={'index':'category'})
fig = px.pie(ctgry_cnt, values='search_tag', names='category', title='Categories distribution', hole=0.6)
fig.update_layout(title_x=0.5)
fig.update_traces(textposition='inside', textinfo='value+percent', marker_colors = ["#ABDDDE","#F8AFA8","#FDDDA0"])
fig.show()
The below is the distribution by district.
districts_cnt = places_df_final[['id','district']].groupby('district').count().rename(columns={'id':'count'}).sort_values(by ='count',ascending = True)
districts_cnt.reset_index(level=['district'], inplace=True)
districts_cnt['district'] = districts_cnt['district'].apply(lambda x: x.split(',')[-1])
fig = px.bar(districts_cnt, x='count', y='district',
text='count', title='Search by districts count',
orientation = 'h',
height=650)
fig.update_layout(title_x=0.5)
fig.update_traces(marker_color='#ABDDDE', marker_line_color='rgb(8,48,107)',
marker_line_width=1.5, opacity=0.6,
textfont_size=20, textposition='outside', textfont_color = 'black')
fig.show()
This is a facet for the distribution by district for each search tag.
districts_cnt = places_df_final[['id','search_tag','district']].groupby(['search_tag','district']).count().rename(columns={'id':'count'}).sort_values(by = ['search_tag','count'],ascending = True)
districts_cnt.reset_index(level=['search_tag','district'], inplace=True)
districts_cnt['district'] = districts_cnt['district'].apply(lambda x: x.split(',')[-1])
# Remove districts column text #
fig = px.bar(districts_cnt, x='count', y='district', facet_col = 'search_tag', facet_col_wrap = 2,
text='count', title='Search category by districts count',
orientation = 'h',
height=650)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_xaxes(showticklabels=True)
fig.update_yaxes(showticklabels=True)
fig.update_yaxes(matches=None)
fig.update_xaxes(matches=None)
fig.update_layout(title_x=0.5)
fig.update_traces(marker_color='#ABDDDE', marker_line_color='rgb(8,48,107)',
marker_line_width=1.5, opacity=0.6,
textfont_size=20, textposition='outside', textfont_color = 'black')
fig.show()
places_df_final.to_csv('pet_shops_cleaned.csv', mode='w', encoding='utf-8-sig')